{
 "cells": [
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "collapsed": true
   },
   "outputs": [],
   "source": [
    "from IPython.core.interactiveshell import InteractiveShell\n",
    "InteractiveShell.ast_node_interactivity = 'all'\n",
    "\n",
    "%load_ext autoreload\n",
    "%autoreload 2"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "# Creating CCT json database for Sul Ross"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "Need to install ExifTool: \n",
    "- Follow instruction at https://www.sno.phy.queensu.ca/~phil/exiftool/install.html#Unix (wget download the tar on the [home page](https://www.sno.phy.queensu.ca/~phil/exiftool/Image-ExifTool-11.44.tar.gz), test and install)\n",
    "- Install its Python wrapper (git clone the repo following instructions on https://smarnach.github.io/pyexiftool/ and inside the directory do `python setup.py install`)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "collapsed": false
   },
   "outputs": [],
   "source": [
    "import os\n",
    "from datetime import datetime\n",
    "import json\n",
    "from collections import defaultdict\n",
    "\n",
    "from tqdm import tqdm\n",
    "\n",
    "import exiftool\n",
    "import path_utils  # ai4eutils"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## List image IDs\n",
    "Get the list of image_id in folders `Summer2018` and `Presidio001`"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "collapsed": false
   },
   "outputs": [],
   "source": [
    "data_dir = '/home/beaver/cameratraps/mnt/sulross'  # container mount point\n",
    "image_dirs = os.listdir(data_dir)\n",
    "image_dirs"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "collapsed": false
   },
   "outputs": [],
   "source": [
    "image_paths = []\n",
    "for image_dir in image_dirs:\n",
    "    image_dir = os.path.join(data_dir, image_dir)\n",
    "    if os.path.isdir(image_dir):\n",
    "        print(image_dir)\n",
    "        for image_path in tqdm(path_utils.recursive_file_list(image_dir, bConvertSlashes=False)):\n",
    "            if path_utils.is_image_file(image_path):\n",
    "                image_paths.append(os.path.join(image_dir, image_path))\n",
    "            \n",
    "image_paths = sorted(image_paths)\n",
    "len(image_paths)\n",
    "image_paths[:3]"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "collapsed": false
   },
   "outputs": [],
   "source": [
    "# exclude the test folders - these are subsets of the other two folders Presidio001 and Summer2018\n",
    "\n",
    "image_ids = []\n",
    "for i in image_paths:\n",
    "    image_id = i.split('/home/beaver/cameratraps/mnt/sulross/')[1]\n",
    "    if not image_id.startswith('test'):\n",
    "        image_ids.append(image_id)\n",
    "len(image_ids)\n",
    "image_ids[:3]"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "collapsed": false
   },
   "outputs": [],
   "source": [
    "len(image_ids)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "collapsed": true
   },
   "outputs": [],
   "source": [
    "with open('/home/beaver/cameratraps/data/sulross/20190522_image_ids.json', 'w') as f:\n",
    "    json.dump(image_ids, f, indent=1)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "collapsed": true
   },
   "outputs": [],
   "source": [
    "meta = {}\n",
    "\n",
    "for i in range (0, 100):\n",
    "    meta[i] = '1'"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "collapsed": false
   },
   "outputs": [],
   "source": [
    "len(meta)"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## Extract labels from EXIF data\n",
    "\n",
    "Used `sulross_get_exif.py` to save the field with the species information from the images. This is saved in `20190522_metadata.json`."
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "collapsed": true
   },
   "outputs": [],
   "source": [
    "image_id_to_metadata = json.load(open('/Users/siyuyang/Source/temp_data/CameraTrap/engagements/SulRoss/20190522/20190522_metadata.json'))"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "collapsed": false
   },
   "outputs": [],
   "source": [
    "len(image_id_to_metadata)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "collapsed": false
   },
   "outputs": [],
   "source": [
    "image_id_to_species = {}\n",
    "no_species = []\n",
    "for image_id, metadata in image_id_to_metadata.items():\n",
    "    species_present = False\n",
    "    for m in metadata:\n",
    "        parts = m.split('|')\n",
    "        if not species_present and len(parts) == 2 and parts[0] == 'Species':\n",
    "            s = parts[1]\n",
    "            if s == 'None':\n",
    "                s = ''\n",
    "            image_id_to_species[image_id] = s\n",
    "            species_present = True\n",
    "    if not species_present:\n",
    "        no_species.append((image_id, metadata))\n",
    "        image_id_to_species[image_id] = ''\n",
    "len(image_id_to_species)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "collapsed": false
   },
   "outputs": [],
   "source": [
    "len(no_species)  # number of images without EXIF field that says \"Species|\" - assume empty...\n",
    "\n",
    "# Most empty images are denoted by \"Species|None\""
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "collapsed": false
   },
   "outputs": [],
   "source": [
    "no_species[100]"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "Spot checked that these are empty of animals."
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "collapsed": true
   },
   "outputs": [],
   "source": [
    "all_species = set(image_id_to_species.values())"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "collapsed": false
   },
   "outputs": [],
   "source": [
    "len(all_species)\n",
    "all_species"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "collapsed": true
   },
   "outputs": [],
   "source": [
    "name_change = {\n",
    "    'Popcupine': 'Porcupine',\n",
    "    'Blacktailed jackrabbit': 'Black-tailed Jackrabbit',\n",
    "    '': 'empty'\n",
    "}\n",
    "\n",
    "# lower-case all species names; get rid of the leading _ in some of them like _Skunk"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "Image IDs are `Presidio001/Cam016/Presidio001__Cam016__2018-03-05__11-43-58(11).JPG`, and the part `Presidio001/Cam016/Presidio001__Cam016__2018-03-05__11-43-` is a sequence ID."
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "collapsed": true
   },
   "outputs": [],
   "source": [
    "def get_info_from_image_name(image_id):\n",
    "    image_name = image_id.split('.')[0]\n",
    "    frame_num = int(image_name.split('(')[-1].split(')')[0])\n",
    "    seq_id_parts = image_name.split('-')\n",
    "    seq_id = '-'.join(seq_id_parts[:-1])\n",
    "    \n",
    "    parts = image_id.split('/')\n",
    "    \n",
    "    # want '2019-05-19 08:57:43'\n",
    "    dt = parts[-1].split('.')[0].split('(')[0].split('__')\n",
    "    date = dt[2]\n",
    "    time = dt[3]\n",
    "    dt = '{} {}'.format(date, ':'.join(time.split('-')))\n",
    "    \n",
    "    # location is folder_name+camera_id\n",
    "    location = '{}+{}'.format(parts[0], parts[1])\n",
    "\n",
    "    return seq_id, frame_num, dt, location"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "collapsed": false
   },
   "outputs": [],
   "source": [
    "image_id = 'Presidio001/Cam016/Presidio001__Cam016__2018-03-05__11-43-58(11).JPG'\n",
    "get_info_from_image_name(image_id)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "collapsed": false
   },
   "outputs": [],
   "source": [
    "image_id = 'Summer2018/D15/Summer2018__D15__2018-06-23__03-56-24(1).JPG'\n",
    "get_info_from_image_name(image_id)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "collapsed": false
   },
   "outputs": [],
   "source": [
    "images = []\n",
    "seq_id_to_num_frames = defaultdict(int)\n",
    "species_count = defaultdict(int)\n",
    "\n",
    "for image_id, species in tqdm(image_id_to_species.items()):\n",
    "    if species in name_change:\n",
    "        species = name_change[species]\n",
    "        \n",
    "    if species.startswith('_'):\n",
    "        species = species.split('_')[1]\n",
    "    species = species.lower()\n",
    "    species_count[species] += 1\n",
    "    \n",
    "    seq_id, frame_num, dt, location = get_info_from_image_name(image_id)\n",
    "    seq_id_to_num_frames[seq_id] += 1 \n",
    "    \n",
    "    images.append({\n",
    "            'id': image_id.split('.')[0],\n",
    "            'file_name': image_id,\n",
    "            'datetime': dt,\n",
    "            'seq_id': seq_id,\n",
    "            'frame_num': frame_num,\n",
    "            'location': location,\n",
    "            'species': species\n",
    "        })"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "collapsed": false
   },
   "outputs": [],
   "source": [
    "images[1000]"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "collapsed": false
   },
   "outputs": [],
   "source": [
    "species_count"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "collapsed": false
   },
   "outputs": [],
   "source": [
    "len(species_count)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "collapsed": false
   },
   "outputs": [],
   "source": [
    "category_map = {\n",
    "    'empty': 0\n",
    "}\n",
    "\n",
    "species = list(species_count.keys())\n",
    "\n",
    "i = 1\n",
    "for s in species:\n",
    "    if s != 'empty':\n",
    "        category_map[s] = i\n",
    "        i += 1\n",
    "    \n",
    "category_map\n",
    "len(category_map)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "collapsed": false
   },
   "outputs": [],
   "source": [
    "final_images = []\n",
    "annotations = []\n",
    "\n",
    "for image in images:\n",
    "    # each image only has one species label in this dataset, so use image_id as annotation_id\n",
    "    \n",
    "    annotations.append({\n",
    "            'id': image['id'] + '_anno',\n",
    "            'image_id': image['id'],\n",
    "            'category_id': category_map[image['species']]\n",
    "        })\n",
    "    \n",
    "    image['seq_num_frames'] = seq_id_to_num_frames[image['seq_id']]\n",
    "    \n",
    "    # frame_num starts at 1\n",
    "    if image['frame_num'] > image['seq_num_frames']:\n",
    "        print(image)\n",
    "    \n",
    "    final_images.append(image)"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "Only one image had frame_num > seq_num_frames..."
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "collapsed": false
   },
   "outputs": [],
   "source": [
    "len(final_images)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "collapsed": false
   },
   "outputs": [],
   "source": [
    "len(annotations)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "collapsed": false
   },
   "outputs": [],
   "source": [
    "final_images[1000]\n",
    "annotations[1000]"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "collapsed": true
   },
   "outputs": [],
   "source": [
    "for image in final_images:\n",
    "    del image['species']"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "collapsed": false
   },
   "outputs": [],
   "source": [
    "final_images[1000]"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "collapsed": false
   },
   "outputs": [],
   "source": [
    "categories = []\n",
    "for name, i in category_map.items():\n",
    "    categories.append({\n",
    "            'id': i,\n",
    "            'name': name\n",
    "        })"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "collapsed": false
   },
   "outputs": [],
   "source": [
    "len(categories)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "collapsed": true
   },
   "outputs": [],
   "source": [
    "db = {\n",
    "    'info': {\n",
    "        'version': '20190530',\n",
    "        'description': 'Sul Ross University data, from folders Presidio001 and Summer2018.',\n",
    "        'contributor': 'Patricia Harveson, Sul Ross University. Database created by Siyu Yang',\n",
    "        'year': 2019,\n",
    "        'date_created': str(datetime.today())\n",
    "    },\n",
    "    'images': final_images,\n",
    "    'categories': categories,\n",
    "    'annotations': annotations\n",
    "}"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "collapsed": false
   },
   "outputs": [],
   "source": [
    "with open('/Users/siyuyang/Source/temp_data/CameraTrap/engagements/SulRoss/20190522/Database/sulross_20190530.json', 'w') as f:\n",
    "    json.dump(db, f, indent=1)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "collapsed": true
   },
   "outputs": [],
   "source": []
  }
 ],
 "metadata": {
  "anaconda-cloud": {},
  "kernelspec": {
   "display_name": "Python [tensorflow]",
   "language": "python",
   "name": "Python [tensorflow]"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.5.4"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 2
}
